A$X = seq(1,nrow(A))
Neigh = A %>% group_by(A.A7_Area.Neighborhood) %>% summarize(N = n()) %>% data.frame() #Make Neighborhood data from Household data
A$Neigh_N = Neigh[match(A$A.A7_Area.Neighborhood, Neigh$A.A7_Area.Neighborhood),'N'] #Just use this to put number of people in neighborhood back in Household dataframe
rm(Neigh)

A = subset(A, ! A$Neigh_N < 20) #7452 -> 7380; when na neighborhoods dropped, we have 7258 vs 7330

#drop na neighborhoods
A=subset(A, !is.na(A$A.A7_Area.Neighborhood))
A=subset(A, A.A7_Area.Neighborhood != 'Bangalore NA')

#clean variables
A$L.L72_Party.performance[which(! A$L.L72_Party.performance %in% c(1,2,3,4))] = NA
A$L.L29_Help.each.other[which(A$L.L29_Help.each.other > 4)] = NA
A$L.L32_Serious.Disagreement[which(A$L.L32_Serious.Disagreement  > 4)] = NA
A$L.L35_Typically.resolved[which(A$L.L35_Typically.resolved > 4)] = NA
A$L.L38_Community.meeting[which(A$L.L38_Community.meeting > 2)]  = NA

############################################################################################################

soc_vars_3 = c('L.L28_Community.work','L.L32_Serious.Disagreement','L.L38_Community.meeting')

set.seed(1000)
ind_pca_3 = prcomp( ~ L.L28_Community.work+L.L32_Serious.Disagreement+
                      L.L38_Community.meeting, data = A, scale = T)

#RESULTS FOR TABLE B1 (created manually)
summary(ind_pca_3)

#FIGURE B1
x_temp = c(1,2,3)
plot(x_temp, ind_pca_3$sdev, pch = 16, main = 'PCA Scree Plot',
     xlab = 'PCA Component', ylab = 'Standard Deviation')
#saved via RStudio dialog; 600x600

#Create new social density metric from weighted average of first two components
PropVar = (ind_pca_3[[1]]^2)/sum(ind_pca_3[[1]]^2)
SocDens2 = - ( ind_pca_3$x[,1]*PropVar[1] - ind_pca_3$x[,2]*PropVar[2] ) / ( PropVar[1] + PropVar[2] )  #is there a principled reason for SUBTRACTING?

#add to main data frame
A[which(complete.cases(A[,soc_vars_3])),'SocDens'] = -ind_pca_3$x[,1]
A[which(complete.cases(A[,soc_vars_3])),'SocDens2'] = SocDens2; rm(SocDens2)

rm(PropVar, soc_vars_3, ind_pca_3)

#Let's get a qualitative sense of what the PCA scores mean in terms of responses
#summary(factor(A$L.L28_Community.work[which(A$SocDens > 2)])) #mode is 10
#median(A$L.L28_Community.work[which(A$SocDens > 2)]) #median is 10
#summary(factor(A$L.L28_Community.work[which(A$SocDens < -2)]))
#median(A$L.L28_Community.work[which(A$SocDens < -2)]) #median is 0
#sd(A$SocDens, na.rm = T) #1.04638

###############################################################################################################
####Clean and create variables for list experiment ####################################
A$L.SE2_1.L74[which(A$L.SE2_1.L74 > 3)] = NA
A$L.SE2_2.L75[which(A$L.SE2_2.L75 > 4)] = NA
A$L.SE2_3.L76[which(A$L.SE2_3.L76 > 4)] = NA

#drop respondents who have NA for all
A = A[-which(is.na(A[,'L.SE2_1.L74']) & is.na(A[,'L.SE2_2.L75']) & is.na(A[,'L.SE2_3.L76']) ), ]

A$ListOut = unlist(apply(A[,c('L.SE2_1.L74','L.SE2_2.L75','L.SE2_3.L76')], 1, function(x) x[which(!is.na(x))])) #whatever list response isn't NA
A$ListTreat = unlist(apply(A[,c('L.SE2_1.L74','L.SE2_2.L75','L.SE2_3.L76')], 1, function(x) which( !is.na(x) ) ) ) #which list did they get (1,2,3)
A$ListControl = as.numeric( A$ListTreat == 1 )
A$ListLeader = as.numeric( A$ListTreat == 2 )
A$ListFavor = as.numeric( A$ListTreat == 3 )

#Create new variables for whether social density is above median (for both measures) 
A[which(A$SocDens > median(A$SocDens, na.rm=T)),'SocDensHi'] = 1
A[which(A$SocDens <= median(A$SocDens, na.rm=T)),'SocDensHi'] = 0
A[which(A$SocDens > median(A$SocDens2, na.rm=T)),'SocDens2Hi'] = 1
A[which(A$SocDens <= median(A$SocDens2, na.rm=T)),'SocDens2Hi'] = 0

#Clean control variables#######################
A$Age = as.numeric(A$C.C4_Age)
A$Male = as.numeric(A$C.C5_Gender == 2)
A$C.C8_Caste = factor(mapvalues(A$C.C8_Caste, from = '888', to = NA))
A$C.C10_Mother.Tongue. = factor(mapvalues(A$C.C10_Mother.Tongue., from = c('888','999'), to = c(NA,NA) ) )
A$C.C50_Years.Of.Schooling.[which(A$C.C50_Years.Of.Schooling. > 22)] = NA

A$GenCaste = as.numeric(A$C.C8_Caste == 1)
A$Hindu = as.numeric(A$C.C6_Religion == 'Hindu')